/* Replication Code:
 Do Survey Experiments Capture Real-World Behavior? 
 External Validation of Conjoint and Vignette Analyses with a Natural Experiment
 Jens Hainmueller, Dominik Hangartner, Teppei Yamamoto
*/
clear all
set more off

* load replication data
use repdata.dta, clear

* trim sample to target population
keep if regular==1
keep if voted==1 
 
* helper programm to extract regression coefficients
capture program drop adjustmat
program def adjustmat
capture matrix drop resmat
mat    coef = e(b)
mat    varr = vecdiag(e(V))
mata: A = st_matrix("varr")
mata: st_matrix("se", sqrt(A))
mat    coef = coef' , se'
scalar R = rowsof(coef)-1
mat    coef = coef[1..R,1..2]
end

gen wgtsave = wgt

* run with and without weights 
local weights "wyes wno"
 foreach w of local weights {
 
 if "`w'" == "wno" {
  replace wgt=1
 }
      
** Attribute Effects in Actual and Hypothetical Naturalization Referendums 
* main regressions
forvalues i = 1(1)7 {
reg   Y i.gender i.origin i.age i.ysince i.educ i.integ  i.lang i.munic i.period [pweight=wgt] , cl(ID) ,  if  mode==`i'
est store  m`i'
adjustmat
mat2txt , matrix(coef) saving("fig1level`w'`i'.txt")  replace
}
esttab m1 m3 m2 m4 m5 m6 m7 using "tableS3`w'.tex", ///
label replace title(Attribute Effects in Acutal and Hypothetical Natrualization Referendums \label{Tab1}) ///
 se(a2) star(* 0.05 ** 0.01) nogaps b(a2) 

** Differences in Effects of Applicant Attributes: Survey versus Behavioral Estimates (Table 1 and Figure S9)
* main model with interactions for mode
estimates clear
matrix drop _all
qui: reg  Y  i.gender##i.mode i.origin##i.mode  i.age##i.mode  i.ysince##i.mode ///
  i.educ##i.mode  i.integ##i.mode i.lang##i.mode  i.munic i.period [pweight=wgt] , cl(ID) 

forvalues i = 2(1)7 {
 local vars "1b.gender 2.gender 1b.origin 2.origin 3.origin 4.origin  5.origin  6.origin  7.origin  8.origin  1b.age 2.age 3.age 4.age 1b.ysince 2.ysince 3.ysince 4.ysince 1b.educ 2.educ 3.educ 1b.integ 2.integ 3.integ 4.integ 1b.lang 2.lang 3.lang"
foreach x of local vars {
  lincom `x'#`i'.mode
  matrix   temp = r(estimate) , r(se)
  matrix   rownames temp = "`x'"
  mat rrr = nullmat(rrr) \ temp
 }
 matrix colnames rrr = pe se
 mat2txt , matrix(rrr) saving("fig1delta`w'`i'.txt")  replace
 svmat  rrr
 rename rrr1 rrr`i'pe
 rename rrr2 rrr`i'se
 mat drop rrr
}

* compute abs diffs
forvalues i = 2(1)7 {
gen     diff`i' = abs(rrr`i'pe)
replace diff`i' = . if diff`i'==.

* compute t value 
gen tdiff`i'     = abs(rrr`i'pe / rrr`i'se)
replace tdiff`i' = . if tdiff`i'==.

* compute p value 
gen pval`i' = ttail(1000,tdiff`i')*2
gen sigtdiff`i'     = tdiff`i' > 1.96 if tdiff`i' !=.

* multiple testing asjustment
qui: multproc , me(bonferroni)  reject(adjsigtdiff`i') pvalue(pval`i')
} 

* mean med max diffs
tabstat diff* , s(mean med max) save
mat results = r(StatTotal)'

* number of sig diffs
tabstat sigtdiff* , s(sum) save 
mat results = results , r(StatTotal)'

*  number of sig diffs (adjusted)
tabstat adjsigtdiff* , s(sum) save 
mat results = results , r(StatTotal)'

* F test
forvalues i = 2(1)7 {
qui: test  2.gender#`i'.mode = ///
2.origin#`i'.mode = 3.origin#`i'.mode = 4.origin#`i'.mode = 5.origin#`i'.mode = 6.origin#`i'.mode = 7.origin#`i'.mode =8.origin#`i'.mode = ///
2.age#`i'.mode = 3.age#`i'.mode = 4.age#`i'.mode = ///
2.ysince#`i'.mode = 3.ysince#`i'.mode = 4.ysince#`i'.mode = ///
2.educ#`i'.mode = 3.educ#`i'.mode = ///
2.integ#`i'.mode = 3.integ#`i'.mode = 4.integ#`i'.mode = ///
 2.lang#`i'.mode = 3.lang#`i'.mode = 0
mat ff = nullmat(ff) , r(F) 
}
mat results = results , ff'

* Cor(Y,Yhat) and Cor(Yhat_b,Yhat_s)
* amend missing test data
foreach x of varlist ysince educ integ  lang {
qui: tab `x', gen(dd`x')
}

foreach x of varlist ysince integ {
 forvalues i = 1/4 {
  egen meandd`x'`i' = mean(dd`x'`i')      if mode==1 & dd`x'5==0
  egen modedd`x'`i' = mode(meandd`x'`i')  if mode==1 
  replace  dd`x'`i' = modedd`x'`i'        if mode==1 & dd`x'5==1
  drop modedd`x'`i' meandd`x'`i'
 }
}

foreach x of varlist educ lang {
 forvalues i = 1/3 {
  egen meandd`x'`i' = mean(dd`x'`i')      if mode==1 & dd`x'4==0
  egen modedd`x'`i' = mode(meandd`x'`i')  if mode==1 
  replace  dd`x'`i' = modedd`x'`i'        if mode==1 & dd`x'4==1
  drop modedd`x'`i' meandd`x'`i'
 }
}
drop  ddysince5 ddeduc4 ddinteg5 ddlang4

* compute fitted values
gen raw = Y if mode==1
forvalues i = 1(1)7 {
qui: reg   Y i.gender i.origin i.age ddysince2 ddysince3 ddysince4 ddeduc2 ddeduc3 ///
             ddinteg2 ddinteg3 ddinteg4 ddlang2 ddlang3  [pweight=wgt] , cl(ID) ,  if  mode==`i'
qui: predict fitted`i'
qui: replace fitted`i' = . if mode!=1
qui: gen delta`i' = fitted`i' - Y if mode==1
}

* Estimated Average Rejection Rate for the Applicants with Naturalization Referendums
tabstat fitted* , s(mean) save 
mat deltarejectrate = r(StatTotal)'
matrix rownames deltarejectrate = behav forcedC pairedC  pairedV singleC singleV studentsC
mat2txt , matrix(deltarejectrate) saving("tableS6`w'.txt")  replace

* correlations of fitted values
qui: cor raw fitted*
mat temp = r(C)
mat temp1 = temp[3..8,1]
mat temp1 = temp1 , temp[3..8,2]

mat    results = results , temp1
scalar rawbehav = temp[2,1]

svmat   results
gen     results0 = ""
replace results0 = "Paired Conjoint, FC" in 1
replace results0 = "Paired Conjoint" in 2
replace results0 = "Paired Vignette" in 3
replace results0 = "Single Conjoint" in 4
replace results0 = "Single Vignette" in 5
replace results0 = "Paired Conjoint, FC (Students)" in 6

quietly {

	cap log close
	set linesize 255
	log using "table1`w'.tex", replace text
	noisily display "\begin{table}[ht]"
	noisily display "\centering"
	noisily display "\footnotesize"
	noisily display "\caption{Differences in Effects of Applicant Attributes: Survey versus Behavioral Estimates \label{tab:relativeperformance}}"

	noisily display "\begin{tabular}{lccc|cc|c|cc}"
	noisily display "\hline \hline"
	noisily display "& \multicolumn{3}{c|}{Absolute Differences}  & \multicolumn{2}{c|}{Sig. Diffs}  & Joint & & \\ "

	noisily display "Design:         & mean & median & max & raw & adj & F-test & Cor(\$Y,\hat{Y}$) & Cor($\hat{Y}_b,\hat{Y}_s$) \\ "
	noisily display "\hline"
		
	foreach j of numlist 2 1 3 4 5 6 {
	noisily display results0[`j'] "&" %9.2f results1[`j'] " & " %9.2f results2[`j'] " & " %9.2f results3[`j'] " & " %3.0f results4[`j'] "/21 & " %3.0f results5[`j'] "/21 & " %9.2f results6[`j'] " & " %9.2f results7[`j'] " & " %9.2f results8[`j']  " \\ "
	}

	noisily display "\hline Behavioral" "&" " & "  " & "  " & " " & " "&" " & " %9.2f rawbehav  " & "   " \\ "
	noisily display "\hline \hline"
	noisily display "\end{tabular}"
	noisily display "\end{table}"

  	log close
}

drop rrr* delta* diff* tdiff* sigtdiff* dd* fitted* results* raw pval* adjsigtdiff*

} /* loop over weights yes or no */

******************************
* Figure 2
preserve
drop if taskno==10
drop if mode==1 | mode==2 | mode==7

egen numberreject = mean(Y), by(ID)
gen  allaccept = (numberreject==0)

reg  allaccept   i.mode  , cl(ID)
capture matrix drop res

lincom 100*_cons
mat res = nullmat(res) \ r(estimate) , r(se)

forvalues i = 4(1)6 {
lincom 100*(_cons + `i'.mode)
mat res = res \ r(estimate) , r(se)
}

matrix colnames res = pe se
matrix rownames res = 1 2 3 4  
matlist res
mat2txt , matrix(res) saving("fig2.txt")  replace
restore 

******************************
** S11: with aggregated origin
replace wgt = wgtsave

** Estimations for
** Attribute Effects in Actual and Hypothetical Naturalization Referendums 
forvalues i = 1(1)7 {
reg   Y i.gender i.originR i.age i.ysince i.educ i.integ  i.lang i.munic i.period [pweight=wgt] , cl(ID) ,  if  mode==`i'
est store  m`i'
adjustmat
mat2txt , matrix(coef) saving("figS11`i'.txt")  replace
}
 
* differences
estimates clear
matrix drop _all
qui: reg  Y  i.gender##i.mode i.originR##i.mode  i.age##i.mode  i.ysince##i.mode ///
             i.educ##i.mode  i.integ##i.mode i.lang##i.mode  i.munic i.period [pweight=wgt] , cl(ID) 

forvalues i = 2(1)7 {
 local vars "1b.gender 2.gender 1b.originR 2.originR 3.originR 4.originR 1b.age 2.age 3.age 4.age 1b.ysince 2.ysince 3.ysince 4.ysince 1b.educ 2.educ 3.educ 1b.integ 2.integ 3.integ 4.integ 1b.lang 2.lang 3.lang"
foreach x of local vars {
 qui: lincom `x'#`i'.mode
  matrix   temp = r(estimate) , r(se)
  matrix   rownames temp = "`x'"
  mat rrr = nullmat(rrr) \ temp
 }
 matrix colnames rrr = pe se
 svmat  rrr
 rename rrr1 rrr`i'pe
 rename rrr2 rrr`i'se
 mat drop rrr
}

forvalues i = 2(1)7 {
gen diff`i' = abs(rrr`i'pe)
replace diff`i' = . if diff`i'==.

gen tdiff`i'     = abs(rrr`i'pe / rrr`i'se)
replace tdiff`i' = . if tdiff`i'==.

gen pval`i' = ttail(1000,tdiff`i')*2
gen sigtdiff`i'     = tdiff`i' > 1.96 if tdiff`i' !=.

multproc , me(bonferroni)  reject(adjsigtdiff`i') pvalue(pval`i')
tab sigtdiff`i'  adjsigtdiff`i'
} 

tabstat diff* , s(mean med max) save
mat results = r(StatTotal)'

tabstat sigtdiff* , s(sum) save 
mat results = results , r(StatTotal)'

tabstat adjsigtdiff* , s(sum) save 
mat results = results , r(StatTotal)'

forvalues i = 2(1)7 {
qui: test  2.gender#`i'.mode = ///
2.originR#`i'.mode = 3.originR#`i'.mode = 4.originR#`i'.mode = ///
 2.age#`i'.mode = 3.age#`i'.mode = 4.age#`i'.mode = ///
 2.ysince#`i'.mode = 3.ysince#`i'.mode = 4.ysince#`i'.mode = ///
 2.educ#`i'.mode = 3.educ#`i'.mode = ///
 2.integ#`i'.mode = 3.integ#`i'.mode = 4.integ#`i'.mode = ///
 2.lang#`i'.mode = 3.lang#`i'.mode = 0
mat ff = nullmat(ff) , r(F) 
}

mat results = results , ff'

foreach x of varlist ysince educ integ lang {
qui: tab `x', gen(dd`x')
}

foreach x of varlist ysince integ {
 forvalues i = 1/4 {
  egen meandd`x'`i' = mean(dd`x'`i')      if mode==1 & dd`x'5==0
  egen modedd`x'`i' = mode(meandd`x'`i')  if mode==1 
  replace  dd`x'`i' = modedd`x'`i'        if mode==1 & dd`x'5==1
  drop modedd`x'`i' meandd`x'`i'
 }
}

foreach x of varlist educ lang {
 forvalues i = 1/3 {
  egen meandd`x'`i' = mean(dd`x'`i')      if mode==1 & dd`x'4==0
  egen modedd`x'`i' = mode(meandd`x'`i')  if mode==1 
  replace  dd`x'`i' = modedd`x'`i'        if mode==1 & dd`x'4==1
  drop modedd`x'`i' meandd`x'`i'
 }
}
drop  ddysince5 ddeduc4  ddinteg5 ddlang4

gen raw = Y if mode==1
forvalues i = 1(1)7 {
qui: reg   Y i.gender i.originR i.age ddysince2 ddysince3 ddysince4 ///
ddeduc2 ddeduc3 ddinteg2 ddinteg3 ddinteg4 ddlang2 ddlang3  [pweight=wgt] , cl(ID) ,  if  mode==`i'
predict fitted`i'
replace fitted`i' = . if mode!=1
gen delta`i' = fitted`i' - Y if mode==1
}

* correlations
cor raw fitted*
mat temp = r(C)
mat temp1 = temp[3..8,1]
mat temp1 = temp1 , temp[3..8,2]
mat results = results , temp1

* correlation fitted and raw
scalar rawbehav = temp[2,1]

* average deviation
svmat results
gen     results0 = ""
replace results0 = "Paired Conjoint, FC" in 1
replace results0 = "Paired Conjoint" in 2
replace results0 = "Paired Vignette" in 3
replace results0 = "Single Conjoint" in 4
replace results0 = "Single Vignette" in 5
replace results0 = "Paired Conjoint, FC (Students)" in 6

quietly {
	cap log close
	set linesize 255
	log using "tableS5.tex", replace text

	noisily display "\begin{table}[ht]"
	noisily display "\centering"
	noisily display "\footnotesize"
	noisily display "\caption{Differences in Effects of Applicant Attributes: Survey versus Behavioral Estimates \label{tab:relativeperformance}}"

	noisily display "\begin{tabular}{lccc|cc|c|cc}"
	noisily display "\hline \hline"
	noisily display "& \multicolumn{3}{c|}{Absolute Differences}  & \multicolumn{2}{c|}{Sig. Diffs}  & Joint & & \\ "

	noisily display "Design:         & mean & median & max & raw & adj & F-test & Cor(\$Y,\hat{Y}$) & Cor($\hat{Y}_b,\hat{Y}_s$) \\ "
	noisily display "\hline"
	
	
	foreach j of numlist 2 1 3 4 5 6 {
	noisily display results0[`j'] "&" %9.2f results1[`j'] " & " %9.2f results2[`j'] " & " %9.2f results3[`j'] " & " %3.0f results4[`j'] "/17 & " %3.0f results5[`j'] "/17 & " %9.2f results6[`j'] " & " %9.2f results7[`j'] " & " %9.2f results8[`j']  " \\ "
	}

	noisily display "\hline Behavioral" "&" " & "  " & "  " & " " & " "&" " & " %9.2f rawbehav  " & "   " \\ "
	
	noisily display "\hline \hline"
	noisily display "\end{tabular}"
	noisily display "\end{table}"

  	log close
}

drop rrr* delta* diff* tdiff* sigtdiff* dd* fitted* results* raw pval* adjsigtdiff*

**************************
* median response time
preserve
drop if mode==1 | mode==7
collapse (mean) tasktime_submit , by(mode taskno ID)
collapse (sum) tasktime_submit , by(mode ID)
tabstat tasktime_submit , by(mode) s(median)
tabstat tasktime_submit , by(mode) s(median) , if mode>4
tabstat tasktime_submit , by(mode) s(median) , if mode<=4

restore

**************************
* S12 
reg  resp_toolong   i.mode , cl(ID)
capture matrix drop res
lincom _cons
mat res = nullmat(res) \ r(estimate) , r(se)
forvalues i = 2(1)5 {
qui: lincom _cons + `i'.mode
mat res = res \ r(estimate) , r(se)
}
matrix colnames res = pe se
matrix rownames res = 1 2 3 4 5
matlist res
mat2txt , matrix(res) saving("S12.txt")  replace
**************************
* S13
reg  resp_complicated   i.mode , cl(ID) 
capture matrix drop res
lincom _cons
mat res = nullmat(res) \ r(estimate) , r(se)
forvalues i = 2(1)5 {
qui: lincom _cons + `i'.mode
mat res = res \ r(estimate) , r(se)
}
matrix colnames res = pe se
matrix rownames res = 1 2 3 4 5
matlist res
mat2txt , matrix(res) saving("S13.txt")  replace



